
use "$rawdata\core_award2019q1.dta", clear
ren institution_id submit_university
drop if recipient==""
drop if unique_award==""|unique_award=="#N/A"
gen year_end=substr(period_end,1,4)
gen year_start=substr(period_start,1,4)
destring year*, replace
collapse (min) year_start (max) year_end, by (submit_u unique_award)
gen duration=year_end-year_start
compress
drop if year_start<=2000 | year_start>2018
replace duration=. if year_end>=2018
save "$temp\award_duration", replace

use "$rawdata\core_employee2019q1.dta", clear
ren emp_num new_emp
ren institution_id submit_university

collapse (max) fte , by (new_emp submit_u unique period_start period_end)
replace fte=1 if fte>1 & fte<.

merge 1:1 new_emp submit_u unique period_start period_end using "$data\employee_cleaned", keep(1 3) keepus(funding_type) nogen
keep if funding_type == "FEDERAL"

gen year=substr(period_end,1,4)
destring year, replace

bysort unique submit_u year: egen total_fte=sum(fte)

merge m:1 period_start period_end submit_u unique using "$temp\temp_award_amount_period", nogen keep(1 3)
replace total_direct=total_direct/total_fte*fte

collapse (sum) total_direct , by (new_emp submit_u unique year)

bysort new_emp submit_u year: egen total_funnding = sum(total_direct)

merge m:1 submit_u unique using "$temp\award_duration", keep(1 3) nogen

gen temp1 = year - year_start
replace temp1 = . if temp1 < 0

gen temp =  temp1 / duration
replace temp=. if temp<0
replace temp=. if temp>1 & temp<.
gen proportion = total_direct / total_funnding 

bysort new_emp submit_u year: egen temp2=sum(temp1*proportion)
bysort new_emp submit_u year: egen temp3=sum((temp1!=.)*proportion)
gen years_since_start = temp2/temp3
bysort new_emp submit_u year: egen temp4=sum(temp*proportion)
bysort new_emp submit_u year: egen temp5=sum((temp!=.)*proportion)
gen frac_duration = temp4/temp5

bysort new_emp submit_u year: gen count=_N
replace years_since_start = temp1 if years_since_start==. & count==1
replace frac_duration = temp if frac_duration==. & count==1

collapse years_since_start frac_duration , by (new_emp submit_u year)

ren new_emp iris_employee_number

save "$data\employee_duration.dta", replace

*regression

use "$data/regression_sample", clear

merge 1:1 iris_employee_number submit_u year using "$data\employee_duration.dta", keep(1 3) nogen

sort iris year
gen years_since_start_imputed = years_since_start
by iris: replace years_since_start_imputed = years_since_start_imputed[_n-1] + 1 if years_since_start_imputed==.
gen frac_duration_imputed = frac_duration
by iris: replace frac_duration_imputed = frac_duration_imputed[_n-1] + 0.1 if frac_duration_imputed==.
replace frac_duration_imputed=1 if frac_duration_imputed>1 & frac_duration_imputed<.

merge m:1 iris using "$data/matching/employee_shocks", keep(3) nogen

gen tau=year-year_shock
replace tau=-1 if treated==0
forv i=13(-1)1 {
	gen byte periodm`i'=(tau==-`i')
}

forv i=0/15 {
gen byte period`i' = (tau==`i')
}
gen periodm5b=(tau<=-5)
gen periodm7b=(tau<=-7)
gen period5a=(tau>=5)
gen period7a=(tau>=7)
gen byte post=(tau>0)

gen temp=year if log_expenditure!=.
cap drop minyear
bysort iris: egen minyear=min(temp)

bysort emp_num: egen nyear=sum(log_expenditure!=.)

gen log_expenditure_federal=log(1+total_direct_federal)
gen log_expenditure_private=log(1+total_direct_private)

merge 1:1 iris year using "$data\pi", keep(1 3) nogen
sort emp_num year
replace pinumber=pinumber[_n-1] if emp_num==emp_num[_n-1] & pinumber==.
replace pinumber=0 if pinumber==.

*by citations
gen npatent_highcitation=npatent_quartile3+npatent_quartile4
gen npatent_lowcitation=npatent-npatent_highcitation
gen npatent_loworiginality=npatent-npatent_highoriginality
gen npatent_lowgenerality=npatent-npatent_highgenerality
gen pub=(npub>0)
gen npub_lowjif=npub-npub_highjif
gen npub_lowcite1=npub-npub_highcite1
gen npub_lowcite2=npub-npub_highcite2

tsset emp_num year

*************************
**define treatment*******
*************************

bysort emp_num: egen years_since1 = sum((tau==-1)*treat*years_since_start)
bysort emp_num: egen temp1 = sum((tau==-1)*treat*(years_since_start!=.))
replace years_since1=. if temp1==0

bysort emp_num: egen years_since2 = sum((tau==-1)*treat*years_since_start_imputed)
bysort emp_num: egen temp2 = sum((tau==-1)*treat*(years_since_start_imputed!=.))
replace years_since2=. if temp2==0

bysort emp_num: egen frac1 = sum((tau==-1)*treat*frac_duration)
bysort emp_num: egen temp3 = sum((tau==-1)*treat*(frac_duration!=.))
replace frac1=. if temp3==0

bysort emp_num: egen frac2 = sum((tau==-1)*treat*frac_duration_imputed)
bysort emp_num: egen temp4 = sum((tau==-1)*treat*(frac_duration_imputed!=.))
replace frac2=. if temp4==0

gen post_expire=0
gen post_new=0

replace post_expire=post*( years_since2>=2 & years_since2<.)
replace post_new = post - post_expire

preserve

keep iris_employee_number submit_university year post years_since2 post_expire post_new
save grant_expiration, replace

restore

eststo clear

foreach var in log_expenditure_federal patent npatent pub npub {
     eststo: reghdfe `var' post_new post_expire if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
 }

esttab using "$data\results\pretrend\march2021\expire.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear

foreach var in log_expenditure_federal patent npatent pub npub {
     eststo: reghdfe `var' post_new post_expire if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
 }

esttab using "$data\results\pretrend\march2021\expire.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)
 
eststo clear

foreach var in log_expenditure_federal patent npatent pub npub {
     eststo: reghdfe `var' post post_expire if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
 }

esttab using "$data\results\pretrend\march2021\expire.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear

foreach var in log_expenditure_federal patent npatent pub npub {
     eststo: reghdfe `var' post post_expire if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
 }

esttab using "$data\results\pretrend\march2021\expire.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

/*

gen old = ( years_since_start_imputed >=2)
replace old=. if years_since_start_imputed==.

reg npub old2 if treat==0 & years_since_start<., cluster(emp_num)
reghdfe npub old2 if treat==0 & years_since_start<., a(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
reg npatent old2 if treat==0 & years_since_start<., cluster(emp_num)
reghdfe npatent old2 if treat==0 & years_since_start<., a(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)




